package cn.datawin.spider;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import cn.datawin.spider.seletor.Html;



public class HtmlFileParser implements  Runnable{
	
	File file;
	
	List<File> files = new ArrayList<File>();
	
	String encode = "UTF-8";
	
	File outFile;
	
	public HtmlFileParser() {
	}
	
	public HtmlFileParser(String file) {
		this.file = new File(file);
		outFile = new File(this.file.getPath()+".csv");
	}
	
	void readHtml(){
		if(file.isFile() && file.getName().endsWith(".htm")){
			files.add(file.getAbsoluteFile());
			return;
		}
		File [] ff =  file.listFiles(new FileFilter() {
			@Override
			public boolean accept(File pathname) {
				return pathname.isFile() && pathname.getName().endsWith(".htm");
			}
		});
		
		files.addAll(Arrays.asList(ff));
	}
	
	void parse() throws IOException, CloneNotSupportedException{
		readHtml();
		BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFile))); 
		out.append("提干,选项,答案\r\n");
		for(File f : files){
			String text = readLine(f);
			Html html = new Html(text);
			save(html, out);
		}
		out.close();
	}
	
	void save(Html html, BufferedWriter out) throws IOException, CloneNotSupportedException{
		Html se =  (Html) html.$("div.exam_info");
		String tm =  ((Html) se.clone()).$("p").text();
		String xd = ((Html) se.clone()).$("div.exam_select").text();
		String da = ((Html) se.clone()).$(".exam_yellow").text();
		out.append("\"").append(tm).append("\"").append(",")
			.append("\"").append(xd).append("\"").append(",")
			.append("\"").append(da).append("\"").append(",")
			.append("\r\n");
	}
	
	
	public String readLine(File _file) {
		StringBuffer buff = new StringBuffer();
		String s = null;
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new InputStreamReader(new FileInputStream(_file), encode));
			while ((s = reader.readLine()) != null) {
				buff.append(s);
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (reader != null)
					reader.close();
			} catch (IOException e) {
				reader = null;
			}
		}
		return buff.toString();
	}
	
	
	@Override
	public void run() {
		try{
			parse();
		}catch(Exception e){
			
		}
	}
	
	
	public static void main(String[] args) {
	
	}
	

}
